clear all
macro drop _all
set more off

*ssc install excelcol

********************************************************************************
*				WORKING DIRECTORIES 
********************************************************************************

if ("`c(os)'"=="Windows") global ROOT "C:/Users/`c(username)'/Dropbox/PhD Maria Ulugbek Mapping indiv poverty" //Windows
if ("`c(os)'"=="MacOSX") global ROOT "/Users/`c(username)'/Dropbox/PhD Maria Ulugbek Mapping indiv poverty" //Mac

global origdata 	"$ROOT/2-data/1-raw"
global cultdata		"$ROOT/2-data/0-cultural data/Data"

global readydata  	"$ROOT/3-prog/CULTURE/replication/1_data"
global prog		 	"$ROOT/3-prog/CULTURE/replication/2_dofiles"
global shares		"$ROOT/3-prog/CULTURE/replication/3_shares"
global tables		"$ROOT/3-prog/CULTURE/replication/4_tables"
global figures		"$ROOT/3-prog/CULTURE/replication/5_figures"

********************************************************************************
*
*			DATA PREPARATION ** GHANA ** 							
*
********************************************************************************

* Special values for missing
global missvals " 999 9999 99999  999999 9999996 9999999 99999999 999999999 9999999999  -99 -999  777 888 999 7777 8888 9999 77777 88888 99999 777777 888888 999999 7777777 8888888 9999999 77777777 88888888 99999999 777777777 888888888 999999999"
global missvals_inlist "999,9999,99999, 999999, 9999996,9999999,99999999, 999999999 ,9999999999, -99 ,-999 ,777,888,999,7777,8888,9999,77777,88888,99999,777777,888888,999999,7777777,8888888,9999999,77777777,88888888,99999999,777777777,888888888,999999999, 9999996"


* RECALL PERIODS
*******************
global denom "1" //changes this to 1 if need to yearly, and to 365 needed daily

* Hours (work hours in a year = 2080)
global hour1 "*2080/$denom"


* Days
forval i=1/1000 {
	global day`i' "*(365/`i')/$denom"
	di "${day`i'}"
}

* Weeks

forval i=1/100 {
	global week`i' 	"*(365/(`i'*7))/$denom"
}
global week05	"*(365/3.5)/$denom" //twice a week


* Months
forval i=1/100 {
	global month`i'	"*(12/`i')/$denom"
}

* Year
forval i=1/10 {
	global year`i'	"*1/(`i'*$denom)"
}


* Cutoff for winsorizing extreme values of expenditure items
global expoutcutoff "0 99.99"  

*------------------------------------------------------------------------------*
*
*					Ghana GLSS 2017
*
*------------------------------------------------------------------------------*
{
	********************************************************************************
	* Identifiers
	********************************************************************************
	* Regional and HH identifiers
	use  "$origdata/GHA_GLSS_2017/1.data/g7sec0.dta", clear
	gen dataset_hhid1 = clust //for combining
	gen dataset_hhid2 = nh //for combining
	gen hhno = nh 
	gen hhid = hid
	gen region3 = region
	decode region, gen(region3_name)
	gen region4 = district
	gen region5 = clust
	keep dataset_hhid1 dataset_hhid2 hhid hhno region3 region3_name region4 region5
	duplicates drop
	tempfile hhidvars
	save `hhidvars'

	* Individual identifiers
	use  "$origdata/GHA_GLSS_2017/1.data/g7sec1.dta", clear
	gen dataset_hhid1 = clust //for combining
	gen dataset_hhid2 = nh //for combining
	gen dataset_pno1 = pid //for combining
	gen pno = pid
	keep dataset_hhid1 dataset_hhid2 dataset_pno1 pno 
	duplicates drop
	tempfile pidvars
	save `pidvars'

	* Weight 
	use  "$origdata/GHA_GLSS_2017/1.data/povgh_2017.dta", clear
	gen dataset_hhid1 = clust //for combining
	gen dataset_hhid2 = nh //for combining
	gen sweight = WTA_S
	keep dataset_hhid1 dataset_hhid2 sweight
	tempfile weight
	save `weight'


	********************************************************************************
	* Household demographic characteristics
	********************************************************************************
	use  "$origdata/GHA_GLSS_2017/1.data/g7sec1.dta", clear
	gen dataset_hhid1 = clust //for combining
	gen dataset_hhid2 = nh //for combining
	gen dataset_pno1 = pid //for combining

	* Basic demographics
	gen p_agey = s1q5y
	gen p_agem = s1q5m
	replace p_agey=p_agem/12 if (p_agey>=.|p_agey==0)  & p_agem<.

	gen p_age40=p_agey>40 if p_agey<.
	gen p_female = s1q2==2 if s1q2<.
	gen p_head = s1q3==1 if s1q3<.
	gen p_spouse = s1q3==2 if s1q3<.

	gen p_childage = 13
	gen p_child = 0 if p_agey<.
	replace p_child = 1 if p_agey<=p_childage & p_agey<.

	gen p_adult = 0 if p_agey<.
	replace p_adult = 1 if p_agey>p_childage & p_agey<.

	gen p_adult18 = 0 if p_agey<.
	replace p_adult18 = 1 if p_agey>=18 & p_agey<.

	gen p_adultchild=0 if p_agey<.
	replace p_adultchild=1 if p_agey>p_childage & p_agey<18


	* Ethnic group
	decode  s1q13, gen(p_ethn) 
	replace p_ethn=lower(p_ethn)

	* Marital status
	gen p_married =  s1q6==1 if !inlist(s1q6, .,9) 
	gen p_single =  s1q6==6 if !inlist(s1q6, .,9) 
	gen p_divorced =  s1q6==4 if !inlist(s1q6, .,9) 
	gen p_cohab =  s1q6==2 if !inlist(s1q6, .,9) 
	gen p_widow =  s1q6==5 if !inlist(s1q6, .,9) 
	gen p_separated =  s1q6==3 if !inlist(s1q6, .,9) 

	keep dataset_* p_*
	tempfile indchars1
	save `indchars1'

	* Education
	use  "$origdata/GHA_GLSS_2017/1.data/g7sec2.dta", clear
	gen dataset_hhid1 = clust //for combining
	gen dataset_hhid2 = nh //for combining
	gen dataset_pno1 = pid //for combining

	gen p_noeduc = s2aq1==2 if s2aq1<.
	gen p_eductert = inlist(s2aq1b, 9, 10, 11, 12)==1 if s2aq1b<.
	replace p_eductert=0 if p_noeduc==1 & p_eductert>=.

	keep dataset_* p_*
	duplicates drop
	tempfile indchars2
	save `indchars2'



	* Employment
	use  "$origdata/GHA_GLSS_2017/1.data/g7sec4.dta", clear
	gen dataset_hhid1 = clust //for combining
	gen dataset_hhid2 = nh //for combining
	gen dataset_pno1 = pid //for combining

	** employee in the last 7 days
	gen p_employee7d1 = inlist(s4aq36, 1,2,3) if s4aq36<.
	gen p_employee7d2 = inlist(s4bq6, 1,2,3) if s4bq6<.
	gen p_employee12m = inlist(s4dq5, 1,2,3) if s4dq5<.
	egen p_employee7d = rowmax(p_employee7d1 p_employee7d2)
	egen p_employee=rowmax(p_employee7d p_employee12m)
	drop p_employee7d1 p_employee7d2

	** self employment in the last 7 days
	gen p_selfempl7d1 = inlist(s4aq36,5,6,7, 8,9 ,10, 11, 12) if s4aq36<.
	gen p_selfempl7d2 = inlist(s4bq6,5,6,7, 8,9 ,10, 11, 12) if s4bq6<.
	gen p_selfempl12m = inlist(s4dq5,5,6,7, 8,9 ,10, 11, 12) if s4dq5<.
	egen p_selfempl7d = rowmax(p_selfempl7d1 p_selfempl7d2)
	egen p_selfempl=rowmax(p_selfempl7d p_selfempl12m)
	drop p_selfempl7d1 p_selfempl7d2

	* Working population
	egen p_working12m = rowmax(p_employee12m p_selfempl12m)
	egen p_working7d = rowmax(p_employee7d p_selfempl7d)
	egen p_working = rowmax(p_employee p_selfempl)


	* Wage
	foreach var in s4aq45a s4aq47a s4aq49a s4bq8a s4bq10a {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen p_wage7d1 = s4aq45a
	replace p_wage7d1 = p_wage7d1$hour1 if s4aq45b ==1
	replace p_wage7d1 = p_wage7d1$day1 if s4aq45b ==2
	replace p_wage7d1 = p_wage7d1$week1 if s4aq45b ==3
	replace p_wage7d1 = p_wage7d1$week2 if s4aq45b ==4
	replace p_wage7d1 = p_wage7d1$month1 if s4aq45b ==5
	replace p_wage7d1 = p_wage7d1$month3 if s4aq45b ==6
	replace p_wage7d1 = p_wage7d1$month6 if s4aq45b ==7
	replace p_wage7d1 = p_wage7d1$year1 if s4aq45b ==8

	gen p_wage7d2 = s4aq47a
	replace p_wage7d2 = p_wage7d2$hour1 if s4aq47b ==1
	replace p_wage7d2 = p_wage7d2$day1 if s4aq47b ==2
	replace p_wage7d2 = p_wage7d2$week1 if s4aq47b ==3
	replace p_wage7d2 = p_wage7d2$week2 if s4aq47b ==4
	replace p_wage7d2 = p_wage7d2$month1 if s4aq47b ==5
	replace p_wage7d2 = p_wage7d2$month3 if s4aq47b ==6
	replace p_wage7d2 = p_wage7d2$month6 if s4aq47b ==7
	replace p_wage7d2 = p_wage7d2$year1 if s4aq47b ==8

	gen p_wage7d3 = s4aq49a
	replace p_wage7d3 = p_wage7d3$hour1 if s4aq49b ==1
	replace p_wage7d3 = p_wage7d3$day1 if s4aq49b ==2
	replace p_wage7d3 = p_wage7d3$week1 if s4aq49b ==3
	replace p_wage7d3 = p_wage7d3$week2 if s4aq49b ==4
	replace p_wage7d3 = p_wage7d3$month1 if s4aq49b ==5
	replace p_wage7d3 = p_wage7d3$month3 if s4aq49b ==6
	replace p_wage7d3 = p_wage7d3$month6 if s4aq49b ==7
	replace p_wage7d3 = p_wage7d3$year1 if s4aq49b ==8

	gen p_wage7d4 = s4bq8a
	replace p_wage7d4 = p_wage7d4$hour1 if s4bq8b ==1
	replace p_wage7d4 = p_wage7d4$day1 if s4bq8b ==2
	replace p_wage7d4 = p_wage7d4$week1 if s4bq8b ==3
	replace p_wage7d4 = p_wage7d4$week2 if s4bq8b ==4
	replace p_wage7d4 = p_wage7d4$month1 if s4bq8b ==5
	replace p_wage7d4 = p_wage7d4$month3 if s4bq8b ==6
	replace p_wage7d4 = p_wage7d4$month6 if s4bq8b ==7
	replace p_wage7d4 = p_wage7d4$year1 if s4bq8b ==8

	gen p_wage7d5 = s4bq10a
	replace p_wage7d5 = p_wage7d5$hour1 if s4bq10b ==1
	replace p_wage7d5 = p_wage7d5$day1 if s4bq10b ==2
	replace p_wage7d5 = p_wage7d5$week1 if s4bq10b ==3
	replace p_wage7d5 = p_wage7d5$week2 if s4bq10b ==4
	replace p_wage7d5 = p_wage7d5$month1 if s4bq10b ==5
	replace p_wage7d5 = p_wage7d5$month3 if s4bq10b ==6
	replace p_wage7d5 = p_wage7d5$month6 if s4bq10b ==7
	replace p_wage7d5 = p_wage7d5$year1 if s4bq10b ==8


	egen p_wage = rowtotal(p_wage7d*), missing
	drop p_wage7d*

	keep dataset_* p_*
	duplicates drop
	tempfile indchars3
	save `indchars3'

	* Income from business
	** revenues
	use  "$origdata/GHA_GLSS_2017/1.data/g7sec10d_1ii.dta", clear
	merge m:1 clust nh s10aid  using "$origdata/GHA_GLSS_2017/1.data/g7sec10a.dta", keepusing(s10aq3 s10cq1 s10d1an s10d1op)
	drop if _merge==2
	drop _merge 
	gen dataset_hhid1 = clust //for combining
	gen dataset_hhid2 = nh //for combining
	gen dataset_pno1 = s10aq3 // if s10aq3<99 //for combining
	replace s10d16b=. if inlist(s10d16b, $missvals_inlist )==1
	gen aux = s10d16b
	replace aux = aux$day1 if s10d1op ==1
	replace aux = aux$week1 if s10d1op ==2
	replace aux = aux$week2 if s10d1op ==3
	replace aux = aux$month1 if s10d1op ==4
	replace aux = aux$month3 if s10d1op ==5
	replace aux = aux$year1 if s10d1op ==6
	bys dataset_hhid1 dataset_hhid2 dataset_pno1: egen p_busrev1 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_busrev1
	save `p_busrev1'
	sum



	use  "$origdata/GHA_GLSS_2017/1.data/g7sec10d4.dta", clear
	merge m:1 clust nh s10aid  using "$origdata/GHA_GLSS_2017/1.data/g7sec10a.dta", keepusing(s10aq3 s10cq1 s10d1an s10d1op)
	drop if _merge==2
	drop _merge 
	gen dataset_hhid1 = clust //for combining
	gen dataset_hhid2 = nh //for combining
	gen dataset_pno1 = s10aq3 if s10aq3<99  //for combining
	replace s10d4q2d=. if inlist(s10d4q2d, $missvals_inlist )==1
	gen aux = s10d4q2d
	replace aux = aux$day1 if s10d1op ==1
	replace aux = aux$week1 if s10d1op ==2
	replace aux = aux$week2 if s10d1op ==3
	replace aux = aux$month1 if s10d1op ==4
	replace aux = aux$month3 if s10d1op ==5
	replace aux = aux$year1 if s10d1op ==6
	bys dataset_hhid1 dataset_hhid2 dataset_pno1: egen p_busrev2 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_busrev2
	save `p_busrev2'
	sum


	use  "$origdata/GHA_GLSS_2017/1.data/g7sec10d5.dta", clear
	merge m:1 clust nh s10aid  using "$origdata/GHA_GLSS_2017/1.data/g7sec10a.dta", keepusing(s10aq3 s10cq1 s10d1an s10d1op)
	drop if _merge==2
	drop _merge 
	gen dataset_hhid1 = clust //for combining
	gen dataset_hhid2 = nh //for combining
	gen dataset_pno1 = s10aq3 if s10aq3<99  //for combining
	replace s10d5q1=. if inlist(s10d5q1, $missvals_inlist )==1
	gen aux = s10d5q1
	replace aux = aux$day1 if s10d1op ==1
	replace aux = aux$week1 if s10d1op ==2
	replace aux = aux$week2 if s10d1op ==3
	replace aux = aux$month1 if s10d1op ==4
	replace aux = aux$month3 if s10d1op ==5
	replace aux = aux$year1 if s10d1op ==6
	bys dataset_hhid1 dataset_hhid2 dataset_pno1: egen p_busrev3 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_busrev3
	save `p_busrev3'

	use  "$origdata/GHA_GLSS_2017/1.data/g7sec10d6i.dta", clear
	merge m:1 clust nh s10aid  using "$origdata/GHA_GLSS_2017/1.data/g7sec10a.dta", keepusing(s10aq3 s10cq1 s10d1an s10d1op)
	drop if _merge==2
	drop _merge 
	gen dataset_hhid1 = clust //for combining
	gen dataset_hhid2 = nh //for combining
	gen dataset_pno1 = s10aq3 //for combining
	replace s10d6q4_10=. if inlist(s10d6q4_10, $missvals_inlist )==1
	gen aux = s10d6q4_10
	replace aux = aux$day7
	bys dataset_hhid1 dataset_hhid2 dataset_pno1: egen p_busrev4 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_busrev4
	save `p_busrev4'

	use  "$origdata/GHA_GLSS_2017/1.data/g7sec10e.dta", clear
	merge m:1 clust nh s10aid  using "$origdata/GHA_GLSS_2017/1.data/g7sec10a.dta", keepusing(s10aq3 s10cq1 s10d1an s10d1op)
	drop if _merge==2
	drop _merge 
	gen dataset_hhid1 = clust //for combining
	gen dataset_hhid2 = nh //for combining
	gen dataset_pno1 = s10aq3 if s10aq3<99  //for combining
	foreach var in s10eq1_6 s10eq3_8 s10eq5_10 {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	egen aux = rowtotal(s10eq1_6 s10eq3_8 s10eq5_10), missing
	replace aux = aux$week2
	bys dataset_hhid1 dataset_hhid2 dataset_pno1: egen p_busrev5 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_busrev5
	save `p_busrev5'


	use  "$origdata/GHA_GLSS_2017/1.data/g7sec10e1.dta", clear
	merge m:1 clust nh s10aid  using "$origdata/GHA_GLSS_2017/1.data/g7sec10a.dta", keepusing(s10aq3 s10cq1 s10d1an s10d1op)
	drop if _merge==2
	drop _merge 
	gen dataset_hhid1 = clust //for combining
	gen dataset_hhid2 = nh //for combining
	gen dataset_pno1 = s10aq3 if s10aq3<99  //for combining
	replace s10e1b=. if inlist(s10e1b, $missvals_inlist )==1
	gen aux = s10e1b
	replace aux = aux$month3
	bys dataset_hhid1 dataset_hhid2 dataset_pno1: egen p_busrev6 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_busrev6
	save `p_busrev6'

	** costs
	use  "$origdata/GHA_GLSS_2017/1.data/g7sec10c.dta", clear
	merge m:1 clust nh s10aid  using "$origdata/GHA_GLSS_2017/1.data/g7sec10a.dta", keepusing(s10aq3 s10cq1 s10d1an s10d1op)
	drop if _merge==2
	drop _merge 
	gen dataset_hhid1 = clust //for combining
	gen dataset_hhid2 = nh //for combining
	gen dataset_pno1 = s10aq3 if s10aq3<99  //for combining
	foreach var in s10c1d s10c1e  {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	egen  aux = rowtotal(s10c1d s10c1e), missing
	replace aux = aux$month3
	bys dataset_hhid1 dataset_hhid2 dataset_pno1: egen p_buscost1 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_buscost1
	save `p_buscost1'


	use  "$origdata/GHA_GLSS_2017/1.data/g7sec10d_1i.dta", clear
	merge m:1 clust nh s10aid  using "$origdata/GHA_GLSS_2017/1.data/g7sec10a.dta", keepusing(s10aq3 s10cq1 s10d1an s10d1op)
	drop if _merge==2
	drop _merge 
	gen dataset_hhid1 = clust //for combining
	gen dataset_hhid2 = nh //for combining
	gen dataset_pno1 = s10aq3 if s10aq3<99  //for combining
	replace s10d1q4c=. if inlist(s10d1q4c, $missvals_inlist )==1
	gen  aux = s10d1q4c
	replace aux = aux$day1 if s10d1op ==1
	replace aux = aux$week1 if s10d1op ==2
	replace aux = aux$week2 if s10d1op ==3
	replace aux = aux$month1 if s10d1op ==4
	replace aux = aux$month3 if s10d1op ==5
	replace aux = aux$year1 if s10d1op ==6
	bys dataset_hhid1 dataset_hhid2 dataset_pno1: egen p_buscost2 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_buscost2
	save `p_buscost2'


	use  "$origdata/GHA_GLSS_2017/1.data/g7sec10d4.dta", clear
	merge m:1 clust nh s10aid  using "$origdata/GHA_GLSS_2017/1.data/g7sec10a.dta", keepusing(s10aq3 s10cq1 s10d1an s10d1op)
	drop if _merge==2
	drop _merge 
	gen dataset_hhid1 = clust //for combining
	gen dataset_hhid2 = nh //for combining
	gen dataset_pno1 = s10aq3 if s10aq3<99  //for combining
	foreach var in s10d4q1b s10d4q1c {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen  aux = s10d4q1b*s10d4q1c
	replace aux = aux$day1 if s10d1op ==1
	replace aux = aux$week1 if s10d1op ==2
	replace aux = aux$week2 if s10d1op ==3
	replace aux = aux$month1 if s10d1op ==4
	replace aux = aux$month3 if s10d1op ==5
	replace aux = aux$year1 if s10d1op ==6
	bys dataset_hhid1 dataset_hhid2 dataset_pno1: egen p_buscost3 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid1 dataset_hhid2 dataset_pno1

	tempfile p_buscost3
	save `p_buscost3'


	use  "$origdata/GHA_GLSS_2017/1.data/g7sec10d6i.dta", clear
	merge m:1 clust nh s10aid  using "$origdata/GHA_GLSS_2017/1.data/g7sec10a.dta", keepusing(s10aq3 s10cq1 s10d1an s10d1op)
	drop if _merge==2
	drop _merge 
	gen dataset_hhid1 = clust //for combining
	gen dataset_hhid2 = nh //for combining
	gen dataset_pno1 = s10aq3 if s10aq3<99  //for combining
	foreach var in  s10d6q7_1 s10d6q7_2 s10d6q7_3 s10d6q7_4 {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	egen  aux = rowtotal(s10d6q7_1 s10d6q7_2 s10d6q7_3 s10d6q7_4), missing
	replace aux = aux$day7
	bys dataset_hhid1 dataset_hhid2 dataset_pno1: egen p_buscost4 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_buscost4
	save `p_buscost4'


	use  "$origdata/GHA_GLSS_2017/1.data/g7sec10d6ii.dta", clear
	merge m:1 clust nh s10aid  using "$origdata/GHA_GLSS_2017/1.data/g7sec10a.dta", keepusing(s10aq3 s10cq1 s10d1an s10d1op)
	drop if _merge==2
	drop _merge 
	gen dataset_hhid1 = clust //for combining
	gen dataset_hhid2 = nh //for combining
	gen dataset_pno1 = s10aq3 if s10aq3<99  //for combining
	replace s10d6q6=. if inlist(s10d6q6, ${missvals_inlist})==1
	gen  aux = s10d6q6
	replace aux = aux$day7
	bys dataset_hhid1 dataset_hhid2 dataset_pno1: egen p_buscost5 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_buscost5
	save `p_buscost5'


	use  "$origdata/GHA_GLSS_2017/1.data/g7sec10f.dta", clear
	merge m:1 clust nh s10aid  using "$origdata/GHA_GLSS_2017/1.data/g7sec10a.dta", keepusing(s10aq3 s10cq1 s10d1an s10d1op)
	drop if _merge==2
	drop _merge 
	gen dataset_hhid1 = clust //for combining
	gen dataset_hhid2 = nh //for combining
	gen dataset_pno1 = s10aq3 if s10aq3<99  //for combining
	replace s10f7=. if inlist(s10f7, ${missvals_inlist})==1
	gen  aux = s10f7
	replace aux = aux$month12
	bys dataset_hhid1 dataset_hhid2 dataset_pno1: egen p_buscost6 = sum(aux), missing
	keep dataset_hhid* dataset_pno* p_*
	duplicates drop
	duplicates report dataset_hhid* dataset_pno*
	tempfile p_buscost6
	save `p_buscost6'


	* Combine all individual-level data
	use `pidvars', clear

	merge m:1 dataset_hhid* using `hhidvars'
	drop if _merge==2
	drop _merge

	merge 1:1 dataset_hhid* dataset_pno* using `indchars1'
	drop if _merge==2
	drop _merge

	merge 1:1 dataset_hhid* dataset_pno* using `indchars2'
	drop if _merge==2
	drop _merge

	merge 1:1 dataset_hhid* dataset_pno* using `indchars3'
	drop if _merge==2
	drop _merge

	forval x=1/6 {
		merge 1:1 dataset_hhid* dataset_pno* using `p_busrev`x''
		drop if _merge==2
		drop _merge

		merge 1:1 dataset_hhid* dataset_pno* using `p_buscost`x''
		drop if _merge==2
		drop _merge
	}


	* Individual income from non-agri business activities
	egen p_busrev = rowtotal(p_busrev?), missing
	egen p_buscost = rowtotal(p_buscost?), missing
	drop p_busrev? p_buscost?

	gen p_businc=.
	replace p_businc = p_busrev 
	replace p_businc = p_businc - p_buscost if p_buscost<.
	replace p_businc = 0 if p_businc<0
	drop p_busrev p_buscost

	egen p_inc = rowtotal(p_wage p_businc), missing
	replace p_inc = 0 if p_working==0 & p_inc>=.


	* Converting individual variables to HH-level
	* Household size
	bys hhid: egen hh_size=count(pno)

	* Number of adults and children
	bys hhid : egen hh_nadult = sum(p_adult), missing
	bys hhid : egen hh_nchild = sum(p_child), missing
	bys hhid : egen hh_nadultchild = sum(p_adultchild), missing

	* Number of men, women, boys and girls
	cap drop aux 
	gen aux = (p_adult==1 & p_female==0) if p_adult<. & p_female<.
	bys hhid: egen hh_nmale = sum(aux), missing

	cap drop aux 
	gen aux = (p_adult==1 & p_female==1) if p_adult<. & p_female<.
	bys hhid: egen hh_nfemale = sum(aux), missing

	cap drop aux 
	gen aux = (p_child==1 & p_female==0) if p_child<. & p_female<.
	bys hhid: egen hh_nboy = sum(aux), missing

	cap drop aux 
	gen aux = (p_child==1 & p_female==1) if p_child<. & p_female<.
	bys hhid: egen hh_ngirl = sum(aux), missing

	* Share of boys among children
	gen hh_pboy=hh_nboy/hh_nchild
	*replace hh_pboy=0 if hh_nchild==0

	* Share of women among adults
	gen hh_pfemale = hh_nfemale/hh_nadult
	*replace hh_pfemale=0 if hh_nadult==0 

	* Mean/maximum age by demographic groups
	cap drop aux
	gen aux = p_agey if p_adult==1
	bys hhid: egen hh_meanage_a = mean(aux)
	bys hhid: egen hh_maxage_a = max(aux)
	bys hhid: egen hh_minage_a = min(aux)

	cap drop aux
	gen aux = p_agey if p_adult==1 & p_female==1
	bys hhid: egen hh_meanage_f = mean(aux)
	bys hhid: egen hh_maxage_f = max(aux)
	bys hhid: egen hh_minage_f = min(aux)

	cap drop aux
	gen aux = p_agey if p_adult==1 & p_female==0
	bys hhid: egen hh_meanage_m = mean(aux)
	bys hhid: egen hh_maxage_m = max(aux)
	bys hhid: egen hh_minage_m = min(aux)

	cap drop aux
	gen aux = p_agey if p_child==1
	bys hhid: egen hh_meanage_c = mean(aux)
	bys hhid: egen hh_maxage_c = max(aux)
	bys hhid: egen hh_minage_c = min(aux)

	cap drop aux
	gen aux = p_agey if p_child==1 & p_female==1
	bys hhid: egen hh_meanage_g = mean(aux)
	bys hhid: egen hh_maxage_g = max(aux)
	bys hhid: egen hh_minage_g = min(aux)

	cap drop aux
	gen aux = p_agey if p_child==1 & p_female==0
	bys hhid: egen hh_meanage_b = mean(aux)
	bys hhid: egen hh_maxage_b = max(aux)
	bys hhid: egen hh_minage_b = min(aux)

	* Age of youngest member/reported child in HH
	bys hhid: egen hh_minage = min(p_agey)

	* Child age used now
	bys hhid: egen hh_childage = max(p_childage)


	* Education by adult demographic groups
	* Share of women/men with no education and tertiary education among adults in HH
	foreach x in noeduc eductert {
		
		cap drop aux
		cap drop auxsum
		gen aux = p_`x' if p_female==1 & p_adult==1
		bys hhid: egen auxsum = sum(aux), missing
		gen hh_p`x'_f = auxsum/hh_nfemale  
		

		cap drop aux
		cap drop auxsum
		gen aux = p_`x' if p_female==0 & p_adult==1
		bys hhid: egen auxsum = sum(aux), missing
		gen hh_p`x'_m = auxsum/hh_nmale //hh_nadult

	}


	* Employment by adult demographic groups
	* Share of women/men with employment among adults in HH
	foreach x in employee selfempl working {

		cap drop aux
		cap drop auxsum
		gen aux = p_`x' if p_adult==1
		bys hhid: egen auxsum = sum(aux), missing
		gen hh_p`x'_a=auxsum/hh_nadult

		cap drop aux
		cap drop auxsum
		gen aux = p_`x' if p_female==1 & p_adult==1
		bys hhid: egen auxsum = sum(aux), missing
		gen hh_p`x'_f=auxsum/hh_nfemale

		cap drop aux
		cap drop auxsum
		gen aux = p_`x' if p_female==0 & p_adult==1
		bys hhid: egen auxsum = sum(aux), missing
		gen hh_p`x'_m=auxsum/hh_nmale
		
	}

	* Household head
	foreach x in agey age40 female noeduc eductert married single divorced cohab widow separated {
		cap drop aux
		gen aux = p_`x' if p_head==1
		bys hhid: egen hh_`x'_h = max(aux)
			
	}
	gen hh_ethn_h=p_ethn if p_head==1
	bys hhid (hh_ethn_h): replace hh_ethn_h = hh_ethn_h[_N] if missing(hh_ethn_h)
	** if HH head didn't report ethnicity/language, take any non-missing value within HH (very few cases)
	cap drop aux 
	gen aux = p_ethn
	bys hhid (aux): replace aux = aux[_N] if missing(aux)
	bys hhid (aux): replace hh_ethn_h=aux if p_head==1 
	bys hhid (hh_ethn_h): replace hh_ethn_h = hh_ethn_h[_N] if missing(hh_ethn_h)
	drop aux
			
	* Sum of indiviudal incomes
	foreach x in wage inc {
		* HH income
		bys hhid: egen hh_`x' = sum(p_`x') , missing
		
		* Women's income
		cap drop aux
		gen aux = p_`x' if p_female==1 & p_adult==1
		bys hhid: egen hh_`x'_f = sum(aux) , missing
		drop aux

		* Men's income
		gen aux = p_`x' if p_female==0 & p_adult==1
		bys hhid: egen hh_`x'_m = sum(aux) , missing
		drop aux
	}

	* Adult equivalent children in HH: sum of adult equivalent children in the hh ; we should have nae<nchild
	generate p_ae = .
	replace p_ae = 0.68*p_child if inrange(p_agey,0,5) 
	replace p_ae = 0.71*p_child if inrange(p_agey,6,10) 
	replace p_ae = 0.91*p_child if inrange(p_agey,11,15) & p_female==0  //boys
	replace p_ae = 1.07*p_child if inrange(p_agey,16,17) & p_female==0  //boys
	replace p_ae = 0.88*p_child if inrange(p_agey,11,15) & p_female==1  //girls
	replace p_ae = 0.83*p_child if inrange(p_agey,16,17) & p_female==1  //girls

	replace p_ae= 0 if p_child==0

	bys hhid: egen hh_nae = sum(p_ae), missing // ae = 


	** CHECKS REPORTING OF AGE, GENDER AND RELATIONSHIP TO HH HEAD:
	* Drop households if no HH head reported, missing in age or gender variable

	* HH head not reported
	cap drop aux
	bys hhid: egen aux=max(p_head)
	gen nohead=(aux==0 | aux==.)
	drop aux

	* Any missing in age
	gen aux = p_agey>=.
	bys hhid: egen agemiss=max(aux)
	drop aux

	* Any missing in gender
	gen aux = p_female>=.
	bys hhid: egen gendermiss=max(aux)
	drop aux

	drop pno p_* dataset_pno*
	duplicates drop

	tempfile hhdemvars
	save `hhdemvars'

	********************************************************************************
	* Household-level characteristics
	********************************************************************************
	* Urban dummy
	use  "$origdata/GHA_GLSS_2017/1.data/g7sec0.dta", clear
	gen dataset_hhid1 = clust //for combining
	gen dataset_hhid2 = nh //for combining
	gen hh_urban = urbrur==1 if urbrur<.
	keep dataset_hhid* hh_*
	duplicates drop
	tempfile hhvars1
	save `hhvars1'

	* House ownership
	use  "$origdata/GHA_GLSS_2017/1.data/g7sec7.dta", clear
	gen dataset_hhid1 = clust //for combining
	gen dataset_hhid2 = nh //for combining
	gen hh_homeown = s7bq1==1 if s7bq1<.
	keep dataset_hhid* hh_*
	duplicates drop
	tempfile hhvars2
	save `hhvars2'

	* Land ownership
	use  "$origdata/GHA_GLSS_2017/1.data/g7sec8a1.dta", clear
	gen dataset_hhid1 = clust //for combining
	gen dataset_hhid2 = nh //for combining
	gen hh_landown = s8aq1==1 if s8aq1<.
	keep dataset_hhid* hh_*
	duplicates drop
	tempfile hhvars3
	save `hhvars3'
				
	* Household -level incomes
	** agricultural income
	use  "$origdata/GHA_GLSS_2017/1.data/g7sec8a2.dta", clear
	gen dataset_hhid1 = clust //for combining
	gen dataset_hhid2 = nh //for combining
	foreach var in s8aq25 s8aq30 {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	egen aux = rowtotal(s8aq25 s8aq30), missing
	replace aux = aux$month12
	bys dataset_hhid1 dataset_hhid2: egen hh_hhagrev1 = sum(aux), missing
	keep dataset_* hh_*
	duplicates drop 
	tempfile hh_hhagrev1
	save `hh_hhagrev1'

	use  "$origdata/GHA_GLSS_2017/1.data/g7sec8a3.dta", clear
	gen dataset_hhid1 = clust //for combining
	gen dataset_hhid2 = nh //for combining
	foreach var in s8aq36 s8aq38  {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	egen aux = rowtotal(s8aq36 s8aq38), missing
	replace aux = aux$month12
	bys dataset_hhid1 dataset_hhid2: egen hh_hhagrev2 = sum(aux), missing
	keep dataset_* hh_*
	duplicates drop 
	tempfile hh_hhagrev2
	save `hh_hhagrev2'

	use  "$origdata/GHA_GLSS_2017/1.data/g7sec8c1.dta", clear
	gen dataset_hhid1 = clust //for combining
	gen dataset_hhid2 = nh //for combining
	foreach var in s8c1q8 s8c1q11   {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	egen aux = rowtotal(s8c1q8 s8c1q11), missing
	replace aux = aux$month12
	bys dataset_hhid1 dataset_hhid2: egen hh_hhagrev3 = sum(aux), missing
	keep dataset_* hh_*
	duplicates drop 
	tempfile hh_hhagrev3
	save `hh_hhagrev3'


	use  "$origdata/GHA_GLSS_2017/1.data/g7sec8c2.dta", clear
	gen dataset_hhid1 = clust //for combining
	gen dataset_hhid2 = nh //for combining
	replace s8c2q26=. if inlist(s8c2q26, ${missvals_inlist})==1
	gen aux = s8c2q26
	replace aux = aux$month12
	bys dataset_hhid1 dataset_hhid2: egen hh_hhagrev4 = sum(aux), missing
	keep dataset_* hh_*
	duplicates drop 
	tempfile hh_hhagrev4
	save `hh_hhagrev4'

	use  "$origdata/GHA_GLSS_2017/1.data/g7sec8e.dta", clear
	gen dataset_hhid1 = clust //for combining
	gen dataset_hhid2 = nh //for combining
	foreach var in s8eq1 s8eq2 s8eq3 s8eq4 s8eq5 s8eq6 s8eq7 s8eq8 s8eq9 s8eq10  {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	egen aux = rowtotal(s8eq1 s8eq2 s8eq3 s8eq4 s8eq5 s8eq6 s8eq7 s8eq8 s8eq9 s8eq10), missing
	replace aux = aux$month12
	bys dataset_hhid1 dataset_hhid2: egen hh_hhagrev5 = sum(aux), missing
	keep dataset_* hh_*
	duplicates drop 
	tempfile hh_hhagrev5
	save `hh_hhagrev5'

	use  "$origdata/GHA_GLSS_2017/1.data/g7sec8f.dta", clear
	gen dataset_hhid1 = clust //for combining
	gen dataset_hhid2 = nh //for combining
	replace s8fq2=. if inlist(s8fq2, ${missvals_inlist})==1
	gen aux = s8fq2
	replace aux = aux$month12
	bys dataset_hhid1 dataset_hhid2: egen hh_hhagcost = sum(aux), missing
	keep dataset_* hh_*
	duplicates drop 
	tempfile hh_hhagcost
	save `hh_hhagcost'

	** other income
	use  "$origdata/GHA_GLSS_2017/1.data/g7sec11b.dta", clear
	gen dataset_hhid1 = clust //for combining
	gen dataset_hhid2 = nh //for combining
	foreach var in s11bq9 s11bq12 s11bq13 {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	egen aux = rowtotal(s11bq9 s11bq12 s11bq13), missing
	replace aux = aux$month12
	bys dataset_hhid1 dataset_hhid2: egen hh_hhoinc1 = sum(aux), missing
	keep dataset_* hh_*
	duplicates drop 
	tempfile hh_hhoinc1
	save `hh_hhoinc1'

	use  "$origdata/GHA_GLSS_2017/1.data/g7sec11c.dta", clear
	gen dataset_hhid1 = clust //for combining
	gen dataset_hhid2 = nh //for combining
	foreach var in s11cq3 s11cq4 s11cq7 s11cq8 s11cq11 s11cq12 ///
			s11cq15 s11cq16 s11cq18 s11cq19 s11cq22 s11cq23  {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen aux1 = s11cq3*s11cq4
	gen aux2 = s11cq7*s11cq8 
	gen aux3 = s11cq11*s11cq12
	gen aux4 = s11cq15*s11cq16
	gen aux5 = s11cq18*s11cq19
	gen aux6 = s11cq22*s11cq23
	egen aux = rowtotal(aux1 aux2 aux3 aux4 aux5 aux6), missing
	replace aux = aux$month12
	bys dataset_hhid1 dataset_hhid2: egen hh_hhoinc2 = sum(aux), missing
	keep dataset_* hh_*
	duplicates drop 
	tempfile hh_hhoinc2
	save `hh_hhoinc2'
	 


	********************************************************************************
	* Consumption expenditure
	********************************************************************************

	* Clothing
	**************
	use  "$origdata/GHA_GLSS_2017/1.data/g7sec9a.dta", clear
	gen dataset_hhid1 = clust //for combining
	gen dataset_hhid2 = nh //for combining
	gen prcodevar = lfreqcd
	foreach var in s9aq2 s9aq4  {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}

	** men
	cap drop aux
	cap drop touse
	gen touse=0
	foreach x in 25 28 58 59 60 61 62 63 64 65 66 68 71 73 76 79 83 87 96 97 98 99 136 137 138 139 140 154 {
		replace touse = 1 if prcodevar==`x'
	} 
	local i=1
	foreach var in s9aq2 s9aq4 {
		cap drop aux`i'
		gen aux`i'=`var'$month12  if touse==1
		winsor2 aux`i', replace cuts($expoutcutoff)	
		local i=`i'+1
	}
	egen aux = rowtotal(aux1 aux2), missing
	bys dataset_hhid1 dataset_hhid2: egen hh_cloth_m= sum(aux)


	** women
	cap drop aux
	cap drop touse
	gen touse=0
	foreach x in 26 29 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 69 72 74 75 80 84 88 94 95 142 143 144 145 146 155 {
		replace touse = 1 if prcodevar==`x'
	} 
	local i=1
	foreach var in s9aq2 s9aq4 {
		cap drop aux`i'
		gen aux`i'=`var'$month12 if touse==1 
		winsor2 aux`i', replace cuts($expoutcutoff) //trim
		local i=`i'+1
	}
	egen aux = rowtotal(aux1 aux2), missing
	bys dataset_hhid1 dataset_hhid2: egen hh_cloth_f= sum(aux)



	** children
	cap drop aux
	cap drop touse
	gen touse=0
	foreach x in 48 49 50 51 52 53 54 55 56 77 81 85 89 148 149 150 151 152 156 {
		replace touse = 1 if prcodevar==`x'
	} 
	local i=1
	foreach var in s9aq2 s9aq4 {
		cap drop aux`i'
		gen aux`i'=`var'$month12 if touse==1
		winsor2 aux`i', replace cuts($expoutcutoff) //trim
		local i=`i'+1
	}
	egen aux = rowtotal(aux1 aux2), missing
	bys dataset_hhid1 dataset_hhid2: egen hh_cloth_c= sum(aux)

	keep dataset_* hh_*
	duplicates drop

	tempfile hh_cloth
	save `hh_cloth'



	* Food
	*********
	use  "$origdata/GHA_GLSS_2017/1.data/g7sec9b.dta", clear
	gen dataset_hhid1 = clust //for combining
	gen dataset_hhid2 = nh //for combining
	foreach var in s9bq1a s9bq2a s9bq3a s9bq4a s9bq5a s9bq6a   {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	cap drop aux
	cap drop touse
	gen touse=0
	replace touse = 1 if freqcd<=647 | (freqcd>=907 & freqcd<=921)
	local i=1
	foreach var in s9bq1a s9bq2a s9bq3a s9bq4a s9bq5a s9bq6a {
		cap drop aux`i'
		gen aux`i'=`var'$day30 if touse==1
		winsor2 aux`i', replace cuts($expoutcutoff) //trim
		local i=`i'+1
	}
	egen aux=rowtotal(aux?), missing  
	bys dataset_hhid1 dataset_hhid2: egen hh_food1= sum(aux)
	keep dataset_* hh_*
	duplicates drop
	tempfile hh_food1
	save `hh_food1'


	use  "$origdata/GHA_GLSS_2017/1.data/g7sec8h.dta", clear
	gen dataset_hhid1 = clust //for combining
	gen dataset_hhid2 = nh //for combining
	foreach var in s8hq3p s8hq4p s8hq5p s8hq6p s8hq7p s8hq8p ///
				   s8hq3q s8hq4q s8hq5q s8hq6q s8hq7q s8hq8q s8hq2 {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	cap drop aux
	cap drop touse
	gen touse=0
	local i=1
	forval z=3/8 {
		cap drop aux`i'
		gen aux`i' = s8hq`z'p*s8hq`z'q*s8hq2$month12 
		winsor2 aux`i', replace cuts($expoutcutoff) //trim
		local i = `i'+1
	}
	egen aux = rowtotal(aux?), missing
	bys dataset_hhid1 dataset_hhid2: egen hh_food2= sum(aux)
	keep dataset_* hh_*
	duplicates drop
	tempfile hh_food2
	save `hh_food2'


	* Non-food public 
	********************
	use  "$origdata/GHA_GLSS_2017/1.data/g7sec9a.dta", clear
	gen dataset_hhid1 = clust //for combining
	gen dataset_hhid2 = nh //for combining
	gen prcodevar = lfreqcd
	foreach var in s9aq2 s9aq4   {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	cap drop aux
	cap drop touse
	gen touse=0
	foreach x in 168 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 ///
					 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 ///
					 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 ///
					 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 ///
					 255 256 257 258 259 260 261 262 263 264 265 266 267 303 304 305 ///
					 307 308 309 310 311 312 313 314 315 316 317 318 329 330 331 333 ///
					 334 336 338 339 340 341 342 343 344 345 346 517 518 519 522 523 ///
					 525 526 527 533 534 627 {
		replace touse = 1 if prcodevar==`x'
	} 
	local i=1
	foreach var in s9aq2 s9aq4 {
		cap drop aux`i'
		gen aux`i'=`var'  if touse==1
		winsor2 aux`i', replace cuts($expoutcutoff) //trim
		local i=`i'+1
	}
	egen aux = rowtotal(aux?), missing
	replace aux =aux$month12
	bys dataset_hhid1 dataset_hhid2: egen hh_nfoodpub1= sum(aux)
	keep dataset_* hh_*
	duplicates drop
	tempfile hh_nfoodpub1
	save `hh_nfoodpub1'


	use  "$origdata/GHA_GLSS_2017/1.data/g7sec9b.dta", clear
	gen dataset_hhid1 = clust //for combining
	gen dataset_hhid2 = nh //for combining
	gen prcodevar = freqcd
	foreach var in s9bq1a s9bq2a s9bq3a s9bq4a s9bq5a s9bq6a   {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	cap drop aux
	cap drop touse
	gen touse=0
	foreach x in 651 652 653 655 657 658 660 661 662 663 665 666 667 668 669 670 675 ///
				 676 677 678 679 680 681 687 688 689 690 691 692 693 697 698 699 703 ///
				 704 705 706 708 855 905 906 {
		replace touse = 1 if prcodevar==`x'
	} 
	local i=1
	foreach var in s9bq1a s9bq2a s9bq3a s9bq4a s9bq5a s9bq6a {
		cap drop aux`i'
		gen aux`i'=`var'  if touse==1
		winsor2 aux`i', replace cuts($expoutcutoff) //trim
		local i=`i'+1
	}
	egen aux = rowtotal(aux?), missing
	replace aux = aux$day30
	bys dataset_hhid1 dataset_hhid2: egen hh_nfoodpub2= sum(aux)
	keep dataset_* hh_*
	duplicates drop
	tempfile hh_nfoodpub2
	save `hh_nfoodpub2'


	* Non-food private 
	********************
	** including health and educaton expenses
	use  "$origdata/GHA_GLSS_2017/1.data/g7sec9a.dta", clear
	gen dataset_hhid1 = clust //for combining
	gen dataset_hhid2 = nh //for combining
	gen prcodevar = lfreqcd
	foreach var in s9aq2 s9aq4    {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	cap drop aux
	cap drop touse
	gen touse=0
	foreach x in 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 ///
				 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 ///
				 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 ///
				 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 ///
				 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 ///
				 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 ///
				 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 ///
				 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 ///
				 165 349 350 351 352 354 355 356 358 359 360 361 362 365 366 367 369 ///
				 371 372 373 374 375 376 377 401 402 403 405 406 407 408 409 410 411 ///
				 412 413 415 415 418 419 420 421 422 425 426 427 428 429 431 432 434 ///
				 435 454 496 497 498 500 501 502 510 512 513 514 529 530 531 532 533 ///
				 534 535 536 537 538 539 540 541 543 544 545 546 551 553 554 556 557 ///
				 558 559 561 562 563 565 566 567 569 570 571 572 574 575 576 577 578 ///
				 579 580 581 582 584 585 587 589 590 591 592 593 594 595 598 599 611 ///
				 612 613 615 616 618 619 620 622 623 625 629 631 634 635 637 638 639 640  {
		replace touse = 1 if prcodevar==`x'
	} 
	local i=1
	foreach var in s9aq2 s9aq4 {
		cap drop aux`i'
		gen aux`i'=`var'  if touse==1
		winsor2 aux`i', replace cuts($expoutcutoff) //trim
		local i=`i'+1
	}
	egen aux = rowtotal(aux?), missing
	replace aux = aux$month12
	bys dataset_hhid1 dataset_hhid2: egen hh_nfoodpriv1= sum(aux)
	keep dataset_* hh_*
	duplicates drop
	tempfile hh_nfoodpriv1
	save `hh_nfoodpriv1'


	use  "$origdata/GHA_GLSS_2017/1.data/g7sec9b.dta", clear
	gen dataset_hhid1 = clust //for combining
	gen dataset_hhid2 = nh //for combining
	gen prcodevar = freqcd
	foreach var in s9bq1a s9bq2a s9bq3a s9bq4a s9bq5a s9bq6a   {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	cap drop aux
	cap drop touse
	gen touse=0
	foreach x in 710 711 712 713 714 715 716 717 718 719 722 723 724 725 800 801 802 ///
				 803 807 808 809 810 820 821 822 823 825 826 827 829 830 831 832 833 ///
				 835 836 837 838 842 844 845 846 846 848 849 850 851 853 854 866 868 ///
				 869 870 871 872 873 874 875 876 877 880 881 882 885 886 887 888 8889 ///
				 890 891 892 893 894 895 896 897 898 899 900 901 902 903 923 925 926 ///
				 927 928 929 935 936 937 938 939 940 941 942 943 944 945 949 950 951 ///
				 952 960 961 962 963 964 968 969 970 971 975 976 977 980 981 982 983 ///
				 984 985 986 990 {
		replace touse = 1 if prcodevar==`x'
	} 
	local i=1
	foreach var in s9bq1a s9bq2a s9bq3a s9bq4a s9bq5a s9bq6a {
		cap drop aux`i'
		gen aux`i'=`var'  if touse==1
		winsor2 aux`i', replace cuts($expoutcutoff) //trim
		local i=`i'+1
	}
	egen aux = rowtotal(aux?), missing
	replace aux = aux$day30
	bys dataset_hhid1 dataset_hhid2: egen hh_nfoodpriv2= sum(aux)
	keep dataset_* hh_*
	duplicates drop
	tempfile hh_nfoodpriv2
	save `hh_nfoodpriv2'

	* Rent and utilities
	**********************
	** any rent dummy
	use  "$origdata/GHA_GLSS_2017/1.data/g7sec7.dta", clear
	gen dataset_hhid1 = clust //for combining
	gen dataset_hhid2 = nh //for combining
	gen hh_anyrent = s7bq1==2 if s7bq1<.

	** rent amount
	foreach var in s7cq1a s7cq3a s7cq6a {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen hh_rent1 = s7cq1a
	winsor2 hh_rent1, replace cuts($expoutcutoff) //trim
	replace hh_rent1=hh_rent1$day1 if  s7cq1b==1
	replace hh_rent1=hh_rent1$week1 if  s7cq1b==2
	replace hh_rent1=hh_rent1$month1 if  s7cq1b==3
	replace hh_rent1=hh_rent1$month3 if  s7cq1b==4
	replace hh_rent1=hh_rent1$month6 if  s7cq1b==5
	replace hh_rent1=hh_rent1$year1 if  s7cq1b==6
	gen hh_rent2 = s7cq3a
	winsor2 hh_rent2, replace cuts($expoutcutoff) //trim
	replace hh_rent2=hh_rent2$day1 if  s7cq3b==1
	replace hh_rent2=hh_rent2$week1 if  s7cq3b==2
	replace hh_rent2=hh_rent2$month1 if  s7cq3b==3
	replace hh_rent2=hh_rent2$month3 if  s7cq3b==4
	replace hh_rent2=hh_rent2$month6 if  s7cq3b==5
	replace hh_rent2=hh_rent2$year1 if  s7cq3b==6
	gen hh_rent3 = s7cq6a
	winsor2 hh_rent3, replace cuts($expoutcutoff) //trim
	replace hh_rent3=hh_rent3$day1 if  s7cq6b==1
	replace hh_rent3=hh_rent3$week1 if  s7cq6b==2
	replace hh_rent3=hh_rent3$month1 if  s7cq6b==3
	replace hh_rent3=hh_rent3$month3 if  s7cq6b==4
	replace hh_rent3=hh_rent3$month6 if  s7cq6b==5
	replace hh_rent3=hh_rent3$year1 if  s7cq6b==6
	egen hh_rent = rowtotal(hh_rent?), missing
	drop hh_rent?

	** water
	foreach var in s7dq7a s7dq8 {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen hh_water1 = s7dq7a
	winsor2 hh_water1, replace cuts($expoutcutoff) //trim
	replace hh_water1 = hh_water1$day1 if s7dq7b==1
	replace hh_water1 = hh_water1$week1 if s7dq7b==2
	replace hh_water1 = hh_water1$month1 if s7dq7b==3
	replace hh_water1 = hh_water1$month3 if s7dq7b==4
	replace hh_water1 = hh_water1$month6 if s7dq7b==5
	replace hh_water1 = hh_water1$year1 if s7dq7b==6
	gen hh_water2 = s7dq8
	winsor2 hh_water2, replace cuts($expoutcutoff) //trim
	replace hh_water2 = hh_water2$week2 
	egen hh_water = rowtotal(hh_water?), missing
	drop hh_water?

	** electricity
	replace s7dq18a=. if inlist(s7dq18a, ${missvals_inlist})==1
	gen hh_electr = s7dq18a
	winsor2 hh_electr, replace cuts($expoutcutoff) //trim
	replace hh_electr = hh_electr$day1 if s7dq18b==1
	replace hh_electr = hh_electr$week1 if s7dq18b==2
	replace hh_electr = hh_electr$month1 if s7dq18b==3
	replace hh_electr = hh_electr$month3 if s7dq18b==4
	replace hh_electr = hh_electr$month6 if s7dq18b==5
	replace hh_electr = hh_electr$year1 if s7dq18b==6

	** other utilities
	foreach var in s7dq25a s7dq27a1 s7dq27b1 {
		replace `var'=. if inlist(`var', ${missvals_inlist})==1
	}
	gen hh_outils1 = s7dq25a
	winsor2 hh_outils1, replace cuts($expoutcutoff) //trim
	replace hh_outils1=hh_outils1$day1 if  s7dq25b==1
	replace hh_outils1=hh_outils1$week1 if  s7dq25b==2
	replace hh_outils1=hh_outils1$month1 if  s7dq25b==3
	replace hh_outils1=hh_outils1$month3 if  s7dq25b==4
	replace hh_outils1=hh_outils1$month6 if  s7dq25b==5
	replace hh_outils1=hh_outils1$year1 if  s7dq25b==6
	gen hh_outils2 = s7dq27a1
	winsor2 hh_outils2, replace cuts($expoutcutoff) //trim
	replace hh_outils2=hh_outils2$day1 if  s7dq27a2==1
	replace hh_outils2=hh_outils2$week1 if  s7dq27a2==2
	replace hh_outils2=hh_outils2$month1 if  s7dq27a2==3
	replace hh_outils2=hh_outils2$month3 if  s7dq27a2==4
	replace hh_outils2=hh_outils2$month6 if  s7dq27a2==5
	replace hh_outils2=hh_outils2$year1 if  s7dq27a2==6
	gen hh_outils3 = s7dq27b1
	winsor2 hh_outils3, replace cuts($expoutcutoff) //trim
	replace hh_outils3=hh_outils3$day1 if  s7dq27b2==1
	replace hh_outils3=hh_outils3$week1 if  s7dq27b2==2
	replace hh_outils3=hh_outils3$month1 if  s7dq27b2==3
	replace hh_outils3=hh_outils3$month3 if  s7dq27b2==4
	replace hh_outils3=hh_outils3$month6 if  s7dq27b2==5
	replace hh_outils3=hh_outils3$year1 if  s7dq27b2==6
	egen hh_outils = rowtotal(hh_outils?), missing
	drop hh_outils?

	keep dataset_* hh_*
	duplicates drop 
	tempfile hh_rentutils
	save `hh_rentutils'


	* World Bank aggregate expenditure variables (for poverty calculations)
	*************************************************************************
	use  "$origdata/GHA_GLSS_2017/1.data/povgh_2017.dta", clear
	gen dataset_hhid1 = clust //for combining
	gen dataset_hhid2 = nh //for combining
	gen rexp_wb = HHEXP_R
	keep dataset_hhid* rexp_wb
	tempfile rexp_wb
	save `rexp_wb'



	********************************************************************************
	* Combine all survey-based variables
	********************************************************************************
	use `hhidvars', clear
	foreach x in weight hhdemvars hhvars1 hhvars2 hhvars3 hh_hhagrev1 hh_hhagrev2 hh_hhagrev3 hh_hhagrev4 hh_hhagrev5 ///
				 hh_hhagcost hh_hhoinc1 hh_hhoinc2 hh_cloth hh_food1 hh_food2 hh_nfoodpub1 hh_nfoodpub2 ///
				 hh_nfoodpriv1 hh_nfoodpriv2 hh_rentutils rexp_wb {
		merge 1:1 dataset_hhid1 dataset_hhid2 using ``x''
		drop if _merge==2
		drop _merge
	}

	* Income from agriculture
	egen hh_hhagrev = rowtotal(hh_hhagrev?), missing
	drop hh_hhagrev?
	gen hh_hhaginc = hh_hhagrev
	replace hh_hhaginc = hh_hhaginc-hh_hhagcost if hh_hhagcost<.
	replace hh_hhaginc=0 if hh_hhaginc<0
	drop hh_hhagcost hh_hhagrev

	* Other income
	egen hh_hhoinc = rowtotal(hh_hhoinc?), missing
	drop hh_hhoinc?

	* Total HH income 
	egen hh_hhinc = rowtotal(hh_hhaginc hh_hhoinc), missing //non-individalized
	egen hh_totinc = rowtotal(hh_inc hh_hhinc), missing
	drop hh_hhoinc hh_hhaginc hh_hhinc

	* Relative income of women
	** women
	gen hh_relinc_f = hh_inc_f/hh_totinc

	* Private expenditure
	egen hh_food = rowtotal(hh_food?), missing
	drop hh_food?
	egen hh_privexp = rowtotal(hh_food hh_nfoodpriv?)
	drop hh_nfoodpriv?

	* Public expenditure
	egen hh_pubexp = rowtotal(hh_nfoodpub? hh_rent hh_water hh_electr hh_outils)
	drop hh_nfoodpub?
	drop hh_rent hh_water hh_electr hh_outils

	* Total expenditure
	egen hh_exp = rowtotal(hh_privexp hh_pubexp), missing

	********************************************************************************
	* Corrections
	********************************************************************************
	* Employment
	* if no men, women, then share of employed, self-employed, working is also zero
	foreach x in pworking pemployee pselfempl {
		replace hh_`x'_a=0 if hh_nadult==0 & hh_`x'_a>=.
		replace hh_`x'_f=0 if hh_nfemale==0 & hh_`x'_f>=.
		replace hh_`x'_m=0 if hh_nmale==0 & hh_`x'_m>=.
	}


	* Demographcics:
	* - Age is zero if no men, women, or children
	foreach x in mean max min {
		replace hh_`x'age_a=0 if hh_nadult==0 & hh_`x'age_a>=.
		replace hh_`x'age_f=0 if hh_nfemale==0 & hh_`x'age_f>=.
		replace hh_`x'age_m=0 if hh_nmale==0 & hh_`x'age_m>=.

		replace hh_`x'age_c=0 if hh_nchild==0 & hh_`x'age_c>=.
		replace hh_`x'age_b=0 if hh_nboy==0 & hh_`x'age_b>=.
		replace hh_`x'age_g=0 if hh_ngirl==0 & hh_`x'age_g>=.	
		
	}


	* Share of boys and women
	replace hh_pboy = 0 if hh_nchild==0 & hh_pboy>=.
	replace hh_pfemale = 0 if hh_nadult==0 & hh_pfemale>=.

	* Education
	* - education is zero if no men, women, or children
	foreach x in pnoeduc peductert {
		replace hh_`x'_f=0 if hh_nfemale==0 & hh_`x'_f>=.
		replace hh_`x'_m=0 if hh_nmale==0 & hh_`x'_m>=.
	}


	* Income variables:
	replace hh_totinc=0 if hh_pworking_a==0 & hh_totinc>=.

	* - if income is 0, then relative income is also zero
	* - if no women, men, then respective relative incomes are zero
	* - If no working people, then total income is zero
	replace hh_inc_f=0 if hh_pworking_f==0 & hh_inc_f>=.
	replace hh_inc_f=0 if hh_nfemale==0 & hh_inc_f>=.
	replace hh_inc_f=0 if hh_totinc==0 & hh_inc_f>=.
	replace hh_relinc_f = 0 if hh_pworking_f==0 & hh_relinc_f>=.	
	replace hh_relinc_f = 0 if hh_nfemale==0 & hh_relinc_f>=.
	replace hh_relinc_f = 0 if hh_totinc==0 & hh_relinc_f>=.

	* Clothing expenditures (if only purchased items were recorded)
	foreach x in hh_cloth_f hh_cloth_m hh_cloth_c {
		replace `x' = 0 if `x'>=.
	}
	
	tempfile gha_glss
	save `gha_glss'
}

*------------------------------------------------------------------------------*
*
* 							Cultural data 
*
*------------------------------------------------------------------------------*
use `gha_glss', clear

*************************************************************************
* Individual matching 
*************************************************************************
* Prepare the Atlas dataset
preserve
	use "$cultdata/ethnographic_atlas_fixed.dta", clear  // Dataset from Ashraf_Bau_Nunn_Voena_JPE_2020_Replication_Files
	gen ethn_match=_n
	** generate cultural variables, based on Alesina et al 2021	
	gen patrilocal=v11==1 if v11!=0 & v11!=9
	gen neolocal=v11==2 if v11!=0 & v11!=9
	gen matrilocal=v11==3 if v11!=0 & v11!=9
	gen matrilineal=v43==3 if v43!=0
	gen patrilineal=v43==1  if v43!=0
	rename v104 lat 
	rename v106 lon
	global eth_vars "patrilocal neolocal matrilocal matrilineal patrilineal"
	keep ethn_match v107 $eth_vars //lat lon 
	tempfile atlas
	save `atlas'
restore

ta hh_ethn_h, g(aux_)
g ethn_match=64 if hh_ethn_h=="agona"
replace ethn_match=64 if hh_ethn_h=="ahafo"
replace ethn_match=124 if hh_ethn_h=="ahanta"
replace ethn_match=179 if aux_4==1 //hh_ethn_h=="akpafu, lolobi, likpe, bowiri, buem, .."
replace ethn_match=64 if hh_ethn_h=="akuapem"
replace ethn_match=64 if hh_ethn_h=="akwamu"
replace ethn_match=1 if hh_ethn_h=="akyem"
replace ethn_match=52 if hh_ethn_h=="aowin"
replace ethn_match=64 if hh_ethn_h=="asante"
replace ethn_match=64 if hh_ethn_h=="asen (assin)"
replace ethn_match=341 if hh_ethn_h=="avatime, nyongbo, tafi, logba"
replace ethn_match=64 if hh_ethn_h=="awutu, efutu, senya, breku"
replace ethn_match=758 if hh_ethn_h=="bimoba"
replace ethn_match=8 if hh_ethn_h=="boron (brong) (including banda)"
replace ethn_match=180 if hh_ethn_h=="builsa (kangyaga or kanjaga)"
replace ethn_match=151 if hh_ethn_h=="busanga"
replace ethn_match=223 if hh_ethn_h=="chamba (kyamba)"
replace ethn_match=64 if hh_ethn_h=="cherepong, larteh, anum-boso"
replace ethn_match=64 if hh_ethn_h=="chokosi (anufor)"
replace ethn_match=769 if hh_ethn_h=="dagarte (dagaba), lobi , wali (wala)"
replace ethn_match=282 if hh_ethn_h=="dagomba"
replace ethn_match=12 if aux_25==1 // hh_ethn_h=="dangme (ada, shai, krobo, osudoku,nin.."
replace ethn_match=65 if hh_ethn_h=="evalue"
replace ethn_match=341 if hh_ethn_h=="ewe"
replace ethn_match=64 if hh_ethn_h=="fante"
replace ethn_match=114 if hh_ethn_h=="fulani"
replace ethn_match=359 if hh_ethn_h=="ga"
replace ethn_match=64 if hh_ethn_h=="gonja"
replace ethn_match=515 if hh_ethn_h=="kasena (paga)"
replace ethn_match=563 if hh_ethn_h=="kokomba"
replace ethn_match=1092 if hh_ethn_h=="kotokoli"
replace ethn_match=597 if hh_ethn_h=="kusasi"
replace ethn_match=64 if hh_ethn_h=="kwahu"
replace ethn_match=690 if hh_ethn_h=="mamprusi"
replace ethn_match=461 if hh_ethn_h=="mo"
replace ethn_match=713 if hh_ethn_h=="mosi"
replace ethn_match=796 if hh_ethn_h=="namnam (nabdom)"
replace ethn_match=796 if hh_ethn_h=="nankansi, talensi & gurense (frafra)"
replace ethn_match=282 if hh_ethn_h=="nanumba"
replace ethn_match=64 if hh_ethn_h=="nkonya"
replace ethn_match=64 if hh_ethn_h=="nzema"
replace ethn_match=566 if aux_47==1 //hh_ethn_h=="other grusi (e.g. lela, templensi, bi.."
replace ethn_match=461 if hh_ethn_h=="sisala"
replace ethn_match=1180 if hh_ethn_h=="vagala"
replace ethn_match=796 if hh_ethn_h=="wali (wala)"
replace ethn_match=3 if hh_ethn_h=="wangara (bambara, madingo & dyula)"
replace ethn_match=64 if hh_ethn_h=="wasa"
replace ethn_match=64 if aux_56==1 // hh_ethn_h=="yeji, nchumuru, krachi, nawuri, bassa.."
replace ethn_match=1260 if hh_ethn_h=="zabrama"

drop aux*
merge m:1 ethn_match using `atlas', keep(match master) 
drop if _merge==2
drop _merge
gen miss_eth_indiv=v107==""   

*************************************************************************
* Geo matching 
*************************************************************************
/*
1. grid cell level information of population density in 2019 from LandScan 
2. Aggregate population by subnational region (higher level) and ethnic group (Giuliano and Nunn, 2018 shapefile)
We obtain a dataset with the current population of each region that lives in areas that were occupied by each ethnic group identified by its language (example BOL.cvs)
3. Merge language with ethnic group based on Giuliano and Nunn, 2018
4. Merge ethnic group with Atlas information
5. Average value of ethnographic traits at the regional level, weiteghed by the current population living in the area.
Obtains a dataset with the population-weighted average of the cultural traits for each region to merge with the expenditure survey (example: regions_BOL.dta).
*/

preserve
	* Prepare the shape file for geographical match: Ethnographic groups that lived in the geographical region of the household. 
	// shape file of global location of ethnographic groups in the Atlas, provided by Giuliano and Nunn (2018)
	tempfile map_data
	tempfile map_coordinates
	shp2dta using "$cultdata/Ethnologue_16_shapefile/langa_no_overlap_biggest_clean", data(`map_data')  coor(`map_coordinates') genid(_ID) replace 

	* Merge the shape file with the location of each group to their characteristics in the Atlas by the language spoken. 
	//The correspondence between language and ehnic gruop is provided by Giuliano and Nunn (2018)
	use `map_data', clear
	rename ID id
	merge m:1 id using "$cultdata/EthnoAtlas_Ethnologue16_baseline_by_language.dta", keep(master match) nogen keepusing(v107)
	tempfile map_data2
	save `map_data2'

	* Calculate population weighted cultural variables per region
	import delimited "$cultdata/Country shape files/GHA.csv", clear  //source: LandScan 2019 population density
	keep if gid_0=="GHA"
	gen name_1_corr=name_1
	rename fid_langa FID_langa
	merge m:1 FID_langa using `map_data2', keep(master match) nogen keepusing(v107)    
	merge m:1 v107 using `atlas', keep(match master) keepusing($eth_vars) 
	collapse (mean) $eth_vars [aw=pob_km2] , by(name_1_corr)
	foreach var in $eth_vars {
		rename `var' `var'_reg
	}
	gen region_name=upper(name_1_corr)
	cap drop name_1_corr
	tempfile regions_GHA
	save `regions_GHA'
restore

* Combine with household survey data
gen region_name=upper(region3_name) 
merge m:1 region_name using  `regions_GHA'
drop if _merge==2
drop _merge
gen miss_eth_geo=patrilocal_reg==.  

		
*************************************************************************
* Rename/labeling
*************************************************************************

* Rename
*************
** remove prefix "hh_" 
rename hh_* *

** demographics
rename nfemale nwomen
rename nmale nmen
rename meanage_a avage_a
rename meanage_c avage_k
rename meanage_f avage_f
rename meanage_m avage_m 
rename meanage_g avage_g
rename meanage_b avage_b
rename minage_c minage_k
rename maxage_c maxage_k
rename pfemale femaleratio

* Labels
**********
cap lab var hhno "HH no within cluster" 
cap lab var hhid "Unique HH ID"
cap lab var region3 "Region ID"
cap lab var region3_name "Region name" 
cap lab var region4 "District ID"
cap lab var region5 "Cluster ID" 
cap lab var sweight "Sampling weight" 
cap lab var size "HH size"
cap lab var nadult "Number of adults"
cap lab var nchild "Number of children" 
cap lab var nadultchild "Number of adult children (aged below 18)" 
cap lab var nae "Number of children, adult equivalent FAO"
cap lab var nmen "Number of men" 
cap lab var nwomen "Number of women" 
cap lab var nboy "Number of boys"
cap lab var ngirl "Number of girls"
cap lab var pboy "Proportion of boys"
cap lab var femaleratio "Proportion of women" 
cap lab var avage_a "Average age of adults" 
cap lab var avage_f "Average age of women" 
cap lab var avage_m "Average age of men" 
cap lab var avage_k "Average age of kids" 
cap lab var avage_g "Average age of girls"
cap lab var avage_b "Average age of boys"
cap lab var maxage_a "Maximum age of adults" 
cap lab var maxage_f "Maximum age of women" 
cap lab var maxage_m "Maximum age of men" 
cap lab var maxage_k "Maximum age of kids" 
cap lab var maxage_g "Maximum age of girls"
cap lab var maxage_b "Maximum age of boys"
cap lab var minage_a "Minimum age of adults" 
cap lab var minage_f "Minimum age of women" 
cap lab var minage_m "Minimum age of men" 
cap lab var minage_k "Minimum age of kids" 
cap lab var minage_g "Minimum age of girls"
cap lab var minage_b "Minimum age of boys"
cap lab var minage "Age of the youngest HH members"
cap lab var childage "Child age threshold" 
cap lab var pnoeduc_f	"Women with no education (as % of women in HH)"
cap lab var pnoeduc_m	"Men with no education (as % of men in HH)"
cap lab var peductert_f	"Women with tert. education (as % of women in HH)"
cap lab var peductert_m	"Men with tert. education (as % of men in HH)"
cap lab var pemployee_a		"Employed adults (as % of adults in HH)"
cap lab var pemployee_f		"Employed women (as % of women in HH)"
cap lab var pemployee_m		"Employed men (as % of men in HH)"
cap lab var pselfempl_a		"Self-employed adults (as % of adults in HH)"
cap lab var pselfempl_f		"Self-employed women (as % of women in HH)"
cap lab var pselfempl_m		"Self-employed men (as % of men in HH)"
cap lab var pworking_a		"Working adults (as % of adults in HH)"
cap lab var pworking_f		"Working women (as % of women in HH)"
cap lab var pworking_m		"Working men (as % of men in HH)"

cap lab var agey_h			"Age of HH head"
cap lab var age40_h			"HH head is older than 40 (=1)"
cap lab var female_h			"HH head is female (=1)"
cap lab var noeduc_h			"HH head has no education (=1)"
cap lab var eductert_h		"HH head has tertiary education (=1)"
cap lab var ethn_h			"HH head's ethnic group"
cap lab var married_h			"HH head is married (=1)"
cap lab var cohab_h			"HH head is cohabiting with partner (=1)"
cap lab var divorced_h		"HH head is divorced (=1)"
cap lab var separated_h		"HH head is separated (=1)"
cap lab var widow_h			"HH head is widow (=1)"
cap lab var single_h			"HH head is single (=1)"

cap lab var wage		"Sum of indivudal wages, annual (local currency)"
cap lab var wage_f		"Sum of women's indivudal wages, annual (local currency)"
cap lab var wage_m		"Sum of men's indivudal wages, annual (local currency)"
cap lab var inc			"Sum of individual incomes, annual (local currency)"
cap lab var inc_f		"Sum of women's individual incomes, annual (local currency)"
cap lab var inc_m		"Sum of men's individual incomes, annual (local currency)"
cap lab var totinc		"Total HH income, annual (local currency)"
cap lab var relinc_f 	"Relative income of women"

cap lab var nohead		"No HH head reported (=1)"
cap lab var agemiss		"Age not reported at least for one HH member (=1)"
cap lab var gendermiss	"Gender not reported at least for one HH member (=1)"

cap lab var urban		"Urban (=1)"
cap lab var homeown		"HH owns house (=1)"
cap lab var landown		"HH owns agricultural land (=1)"
cap lab var anyrent		"HH pays rent (=1)"

cap lab var cloth_m		"Annual HH expenditure on men's clothing (local currency)"
cap lab var cloth_f		"Annual HH expenditure on women's clothing (local currency)"
cap lab var cloth_c		"Annual HH expenditure on children's clothing (local currency)"
cap lab var food		"Annual HH food expenditure (local currency)"
cap lab var pubexp		"Annual HH public expenditure (local currency)"
cap lab var privexp		"Annual HH private expenditure (local currency)"
cap lab var exp			"Annual HH expenditure (local currency)"
cap lab var rexp_wb 	"Annual HH expenditure, WB estimate (local currency)"

cap lab var ethn_match "Ethnic group no."
cap lab var v107 "Society name" 
cap lab var patrilocal "Patrilocal (=1)" 
cap lab var neolocal "Neolocal (=1)"
cap lab var matrilocal "Matrilocal (=1)" 
cap lab var matrilineal "Matrilineal (=1)" 
cap lab var patrilineal "Patrilineal (=1)" 
cap lab var miss_eth_indiv "Missing ethno data, individual matching" 
cap lab var miss_eth_geo "Missing ethno data, geo matching" 
cap lab var region_name "Region name (used for matching ethno data)" 
cap lab var patrilocal_reg "Patrilocality rate in region"
cap lab var neolocal_reg "Neolocality rate in region"
cap lab var matrilocal_reg "Matrilocality rate in region"
cap lab var matrilineal_reg "Matrilineality rate in region"
cap lab var patrilineal_reg "Patrilineality rate in region"
 

save "$replication/GHA.dta", replace
	
	
*************************************************************************
*  Indivudal-level data on women's control over earnings
*************************************************************************
local maxage=67

use "$origdata/GHA_GLSS_2017/1.data/g7sec4.dta", clear
merge 1:1 phid using "$origdata/GHA_GLSS_2017/1.data/g7sec1.dta" , keepusing(s1q6 s1q13 loc2 s1q10) 
rename s1q13 ethn_h

g ethn_match=64 if ethn_h==1
replace ethn_match=64 if ethn_h==2
replace ethn_match=124 if ethn_h==3
replace ethn_match=179 if ethn_h==41
replace ethn_match=64 if ethn_h==4
replace ethn_match=64 if ethn_h==5
replace ethn_match=1 if ethn_h==6
replace ethn_match=52 if ethn_h==7
replace ethn_match=64 if ethn_h==8
replace ethn_match=64 if ethn_h==9
replace ethn_match=341 if ethn_h==42
replace ethn_match=64 if ethn_h==43
replace ethn_match=758 if ethn_h==51
replace ethn_match=8 if ethn_h==10
replace ethn_match=180 if ethn_h==61
replace ethn_match=151 if ethn_h==81
replace ethn_match=223 if ethn_h==57
replace ethn_match=64 if ethn_h==44
replace ethn_match=64 if ethn_h==11
replace ethn_match=769 if ethn_h==62
replace ethn_match=282 if ethn_h==63
replace ethn_match=12 if ethn_h==21 
replace ethn_match=65 if ethn_h==13
replace ethn_match=341 if ethn_h==30
replace ethn_match=64 if ethn_h==14
replace ethn_match=114 if ethn_h==93
replace ethn_match=359 if ethn_h==20
replace ethn_match=359 if ethn_h==22
replace ethn_match=64 if ethn_h==45
replace ethn_match=515 if ethn_h==71
replace ethn_match=563 if ethn_h==52
replace ethn_match=1092 if ethn_h==56
replace ethn_match=597 if ethn_h==64
replace ethn_match=64 if ethn_h==15
replace ethn_match=690 if ethn_h==65
replace ethn_match=461 if ethn_h==72
replace ethn_match=713 if ethn_h==69
replace ethn_match=796 if ethn_h==66
replace ethn_match=796 if ethn_h==67
replace ethn_match=282 if ethn_h==68
replace ethn_match=64 if ethn_h==46
replace ethn_match=64 if ethn_h==16
replace ethn_match=566 if ethn_h==75
replace ethn_match=461 if ethn_h==73
replace ethn_match=1180 if ethn_h==74
replace ethn_match=796 if ethn_h==59
replace ethn_match=3 if ethn_h==82
replace ethn_match=64 if ethn_h==18
replace ethn_match=64 if ethn_h==47
replace ethn_match=1260 if ethn_h==94

cap drop _merge
merge m:1 ethn_match using `atlas', keep(match master) keepusing(v107 patrilocal matrilocal neolocal) 

sum s4aq47a
replace s4aq47a=0 if s4aq47a==.
replace s4aq45a=0 if s4aq45a==.
replace s4aq49a=0 if s4aq49a==.
gen income=s4aq45a + s4aq49a + s4aq47a
gen work=(income>=1)
gen married=(s1q6==1  | s1q6==2) 
gen female=sex==2
gen decides=(s4aq50==pid) if work==1 
gen urban=(loc2==1)

keep if sex==2 
keep if married==1 
keep if age>=17  & age<=`maxage'
keep if work==1

keep clust nh pid age decides urban region patrilocal 

cap lab var age "Age" 
cap lab var patrilocal "Patrilocal (=1)"
cap lab var region "Region"
cap lab var urban "Urban (=1)"
cap lab var decides "Control over own earnings"

save "$replication/GHA_women.dta", replace



